{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "181edd2d-67d4-43e4-9a89-327eaff26177",
   "metadata": {},
   "source": [
    "Grammar and Vocab AI Checker"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4be465e2-16fc-4b34-a771-d23f05edbc14",
   "metadata": {},
   "outputs": [],
   "source": [
    "pip install PyMuPDF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "66b371fb-f4ea-4ced-8ad2-4229892e0647",
   "metadata": {},
   "outputs": [],
   "source": [
    "# imports\n",
    "\n",
    "import os\n",
    "import requests\n",
    "from dotenv import load_dotenv\n",
    "from bs4 import BeautifulSoup\n",
    "from IPython.display import Markdown, display\n",
    "from openai import OpenAI\n",
    "import fitz # PyMuPDF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "41068273-4325-4de2-b11d-37d2831b1a47",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load environment variables in a file called .env\n",
    "\n",
    "load_dotenv(override=True)\n",
    "api_key = os.getenv('OPENAI_API_KEY')\n",
    "\n",
    "# Check the key\n",
    "\n",
    "if not api_key:\n",
    "    print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
    "elif not api_key.startswith(\"sk-proj-\"):\n",
    "    print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
    "elif api_key.strip() != api_key:\n",
    "    print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
    "else:\n",
    "    print(\"API key found and looks good so far!\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba003970-0cc9-4e11-8702-0b120f378fa4",
   "metadata": {},
   "outputs": [],
   "source": [
    "openai = OpenAI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "faa89067-fcee-4950-b4ce-3faec640c79b",
   "metadata": {},
   "outputs": [],
   "source": [
    "system_prompt = \"You are a spell, grammar, and vocabulary checker. You check for any mistakes in terms of spelling, grammar, and vocabulary of texts or files that are given to you. You provide a response with the percentage of the text that is correct in terms of spelling, vocab, and grammar but also the total number of words. These characters is in the file or text that you are checking, and provide instructions in bullet points on how to fix them and where the mistakes are.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "de32a94d-9c1b-4e1a-a1b9-78d3180c0d79",
   "metadata": {},
   "outputs": [],
   "source": [
    "# user_prompt = \"Hi, mw namw is kkkdvin. How are y,?\" # Uncomment this to test the implementation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "272f379d-3471-488d-ba27-bbffff961d72",
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract_pdf_text_to_string(pdf_path):\n",
    "        \"\"\"\n",
    "        Extracts all text from a PDF file and returns it as a single string.\n",
    "\n",
    "        Args:\n",
    "            pdf_path (str): The path to the PDF file.\n",
    "\n",
    "        Returns:\n",
    "            str: A string containing all the extracted text from the PDF.\n",
    "        \"\"\"\n",
    "        text_content = \"\"\n",
    "        try:\n",
    "            doc = fitz.open(pdf_path)\n",
    "            for page_num in range(doc.page_count):\n",
    "                page = doc.load_page(page_num)\n",
    "                text_content += page.get_text()\n",
    "            doc.close()\n",
    "        except Exception as e:\n",
    "            print(f\"Error processing PDF: {e}\")\n",
    "            return None\n",
    "        return text_content\n",
    "\n",
    "pdf_file_path = \"gram-vocab-test.pdf\"  # Replace with the actual path to your PDF\n",
    "user_prompt = extract_pdf_text_to_string(pdf_file_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "07a839f6-c508-4b94-98ec-877c19023e58",
   "metadata": {},
   "outputs": [],
   "source": [
    "messages = [\n",
    "    {\"role\": \"system\", \"content\": system_prompt},\n",
    "    {\"role\": \"user\", \"content\": f\"This is the text to check for grammar, vocab, and spelling errors: {user_prompt}\"}\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a642cb62-9016-4957-a74e-9f97f8c495a7",
   "metadata": {},
   "outputs": [],
   "source": [
    "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ce6b006-19b6-48b4-b344-b4b57b8c1438",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(response.choices[0].message.content)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "54bc23cd-f59c-4b4d-bc3e-60f273692d92",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
